<?php
/**
 * Using QueryPath to parse an Open Document Type (ODT) text file.
 *
 * The ODT format is a standard way of representing word processing documents.
 * It is based on a combination of technologies, one of which is XML. Document
 * text is stored in a structural (semantic) XML document, while other information,
 * such as style sheets, are stored in auxilliary files.
 *
 * This example shows how ODT files can be opened and manipulated using QueryPath.
 *
 * The file used here was generated by OpenOffice 3.x. Other word processors are 
 * also capable of generating ODT files.
 *
 * 
 * @author M Butcher <matt@aleph-null.tv>
 * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license.
 */

/** Include main QP library. */
require_once '../src/QueryPath/QueryPath.php';

// If you have the Zip lib combiled in: 
//$file = 'zip://openoffice.odt#content';
// Example for systems w/o zip lib:
$file = 'tmp/content.xml';
$doc = qp($file);
print 'Contents:' . PHP_EOL;

// Show the "outline": all of the heading items:
foreach ($doc->find('text|h') as $header) {
  $style = $header->attr('text:style-name');
  $attr_parts = explode('_', $style);
  $level = array_pop($attr_parts);
  $out = str_repeat('  ', $level) . '- ' . $header->text();
  print $out . PHP_EOL;
}

// This is a fairly sophisticated selector. It gets the first
// <text:list text:style-name="L1"> match and then gets the 
// <text:p text:style-name="P1"> elements. That is the syntax for
// ODT lists. Not elegant....
$selector = 'text|list[text|style-name="L1"]:first text|p[text|style-name="P1"]';

print PHP_EOL . "Bullet List" . PHP_EOL;
foreach ($doc->top()->find($selector) as $bullet) {
  print '  * ' . $bullet->text() . PHP_EOL;
}

print PHP_EOL . "Ordered List" . PHP_EOL;
$i = 0;
foreach ($doc->top()->find('text|list[text|style-name="L2"]:first text|p[text|style-name="P2"]') as $bullet) {
  print '  ' . (++$i) . '. '. $bullet->text() . PHP_EOL;
}